Setup

Load R libraries

library(data.table)
library(ggplot2)
library(ggpubr)
library(tidyr)
library(limma)
library(biomaRt)
library(fgsea)
library(goseq)

theme_set(theme_classic())

cell_type_name = params$cell_type_name
graph_weight = params$graph_weight

cell_type_name
## [1] "cd8"
graph_weight
## [1] "2.0"

Check enrichment of gene sets

Read in gene info and gene set assignments

file_tag = sprintf("%s_BL_%s", cell_type_name, graph_weight)

assayed_genes = scan(sprintf("output/gene_list_%s.txt", file_tag), 
                     what = character(), sep="\n")

gene_sets = scan(sprintf("output/name_s_%s.txt", file_tag), 
                 what = character(), sep="\n")

gene_sets = sapply(gene_sets, strsplit, USE.NAMES=FALSE, split=",")
n_genes   = sapply(gene_sets, length)
names(n_genes) = NULL
summary(n_genes)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   16.00   17.00   18.00   17.85   18.00   19.00
length(n_genes)
## [1] 40
sort(n_genes)
##  [1] 16 17 17 17 17 17 17 17 17 17 17 17 18 18 18 18 18 18 18 18 18 18 18 18 18
## [26] 18 18 18 18 18 18 18 18 19 19 19 19 19 19 19

Find gene symbols

Find gene symbols from bioMart.

All the gene symbols that can be found in bioMart are consistent with what we have. So no need to run it.

ensembl = useMart("ensembl", dataset = "hsapiens_gene_ensembl")

gene_BM = getBM(attributes = c("hgnc_symbol", "external_gene_name"), 
                filters = "external_gene_name", 
                values = assayed_genes, 
                mart = ensembl)
length(assayed_genes)
dim(gene_BM)
gene_BM[1:2,]

table(assayed_genes %in% gene_BM$external_gene_name)

t1 = table(gene_BM$external_gene_name)
dup = names(t1)[t1 > 1]
gene_BM[gene_BM$external_gene_name %in% dup,]

table(gene_BM$hgnc_symbol == gene_BM$external_gene_name)
w2kp = which(gene_BM$hgnc_symbol != gene_BM$external_gene_name)
gene_BM[w2kp,]

Find gene symbols using the alias2Symbol function from limma.

a2s = rep(NA, length(assayed_genes))
for(i in 1:length(assayed_genes)){
  gi = assayed_genes[i]
  ai = alias2Symbol(gi)
  if(length(ai) > 1){
    print(gi)
    print(ai)
  }
  a2s[i] = ai[1]
}
## [1] "QARS"
## [1] "EPRS1" "QARS1"
## [1] "SEPT2"
## [1] "SEPTIN6" "SEPTIN2"
table(is.na(a2s))
## 
## FALSE  TRUE 
##  1607    42
table(a2s == assayed_genes, useNA = 'ifany')
## 
## FALSE  TRUE  <NA> 
##    42  1565    42
gene_info = data.table(sym_in_data = assayed_genes, sym_limma = a2s)

gene_info[sym_in_data != sym_limma,]
##     sym_in_data sym_limma
##  1:    C10orf91 LINC02870
##  2:    C12orf10      MYG1
##  3:    C12orf45  NOPCHAP1
##  4:     C6orf48    SNHG32
##  5:     C6orf99 LINC02901
##  6:    CXorf40A     EOLA1
##  7:     CXorf57      RADX
##  8:     FAM102A     EEIG1
##  9:     FAM173A    ANTKMT
## 10:     FAM213B    PRXL2B
## 11:       H2AFX      H2AX
## 12:   HIST1H2AG    H2AC11
## 13:   HIST1H2BK    H2BC12
## 14:   HIST1H2BN    H2BC15
## 15:    HIST1H3A      H3C1
## 16:    HIST1H3H     H3C10
## 17:    HIST1H4C      H4C3
## 18:   HIST2H2BF    H2BC18
## 19:    KIAA0391     PRORP
## 20:        QARS     EPRS1
## 21:       SEPT6   SEPTIN6
## 22:       ARNTL     BMAL1
## 23:    C12orf65     MTRFR
## 24:    C16orf72   HAPSTR1
## 25:      CCDC84   CENATAC
## 26:      DOPEY2     DOP1B
## 27:     FAM126B     HYCC2
## 28:    FAM160B1    FHIP2A
## 29:        H1FX     H1-10
## 30:       H2AFJ      H2AJ
## 31:       HEXDC      HEXD
## 32:    HIST1H1C      H1-2
## 33:    HIST1H1D      H1-3
## 34:    HIST1H1E      H1-4
## 35:    KIAA1109     BLTP1
## 36:    KIAA1551     RESF1
## 37:        MKL1     MRTFA
## 38:       NARFL     CIAO3
## 39:       SEPT2   SEPTIN6
## 40:      TARSL2     TARS3
## 41:      TMEM8A     PGAP6
## 42:       WDR60   DYNC2I1
##     sym_in_data sym_limma
gene_info[, gene_symbol := sym_in_data]
gene_info[which(sym_in_data != sym_limma), gene_symbol := sym_limma]

dim(gene_info)
## [1] 1649    3
gene_info[1:5,]
##    sym_in_data sym_limma gene_symbol
## 1:      ABLIM1    ABLIM1      ABLIM1
## 2:  AC004687.1      <NA>  AC004687.1
## 3:  AC004854.2      <NA>  AC004854.2
## 4:  AC007384.1      <NA>  AC007384.1
## 5:  AC007952.4      <NA>  AC007952.4
t1 = table(gene_info$gene_symbol)
table(t1)
## t1
##    1    2 
## 1647    1
gene_info[gene_symbol %in% names(t1)[t1 == 2],]
##    sym_in_data sym_limma gene_symbol
## 1:       SEPT6   SEPTIN6     SEPTIN6
## 2:       SEPT2   SEPTIN6     SEPTIN6
gene_info[sym_in_data == "HIST1H2BC", gene_symbol:="H2BC4"]
gene_info[sym_in_data == "SEPT6", gene_symbol:="SEPTIN6"]
gene_info[sym_in_data == "SEPT2", gene_symbol:="SEPTIN2"]

Prepare gene set information

Gene set annotations (by gene symbols) were downloaded from MSigDB website.

gmtfile = list()
gmtfile[["reactome"]] = "../Annotation/c2.cp.reactome.v2023.2.Hs.symbols.gmt"
gmtfile[["go_bp"]]    = "../Annotation/c5.go.bp.v2023.2.Hs.symbols.gmt"
gmtfile[["immune"]]   = "../Annotation/c7.all.v2023.2.Hs.symbols.gmt"

pathways = list()
for(k1 in names(gmtfile)){
  pathways[[k1]] = gmtPathways(gmtfile[[k1]])
}

names(pathways)
## [1] "reactome" "go_bp"    "immune"
sapply(pathways, length)
## reactome    go_bp   immune 
##     1692     7647     5219

Filter gene sets for size between 10 and 500.

lapply(pathways, function(v){
  quantile(sapply(v, length), probs = seq(0, 1, 0.1), na.rm = TRUE)
})
## $reactome
##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90%   100% 
##    5.0    7.0    9.0   12.0   17.0   23.0   31.0   44.0   71.8  120.9 1463.0 
## 
## $go_bp
##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90%   100% 
##    5.0    6.0    8.0   10.0   14.0   19.0   29.0   46.0   80.8  183.0 1966.0 
## 
## $immune
##   0%  10%  20%  30%  40%  50%  60%  70%  80%  90% 100% 
##    5  162  193  197  199  199  200  200  200  200 1992
for(k1 in names(pathways)){
  p1 = pathways[[k1]]
  pathways[[k1]] = p1[sapply(p1, length) %in% 10:500]
}

Conduct enrichment analysis

dim(gene_info)
## [1] 1649    3
max_n2kp = 10

goseq_res = NULL

for(k in 1:length(gene_sets)){
  if(length(gene_sets[[k]]) < 10) { next }
  
  print(k)
  set_k = paste0("set_", k)
  print(gene_sets[[k]])
  
  genes = gene_info$sym_in_data %in% gene_sets[[k]]
  names(genes) = gene_info$gene_symbol
  table(genes)
  
  pwf = nullp(genes, "hg38", "geneSymbol")

  for(k1 in names(pathways)){
    p1 = pathways[[k1]]
    res1 = goseq(pwf, "hg38", "geneSymbol", 
                 gene2cat=goseq:::reversemapping(p1))
    res1$FDR  = p.adjust(res1$over_represented_pvalue, method="BH")
    
    nD = sum(res1$FDR < 0.1)
    
    if(nD > 0){
      res1 = res1[order(res1$FDR),][1:min(nD, max_n2kp),]
      res1$category = gsub("REACTOME_|GOBP_", "", res1$category)
      res1$category = gsub("_", " ", res1$category)
      res1$category = tolower(res1$category)
      res1$category = substr(res1$category, start=1, stop=81)
      goseq_res[[set_k]][[k1]] = res1
    }
  }
}
## [1] 1
##  [1] "ARHGEF3"   "ARL4C"     "BROX"      "COL6A2"    "COL6A3"    "DMTF1"    
##  [7] "ELMOD3"    "FAM169A"   "GPR174"    "IFITM2"    "LAIR2"     "LINC02256"
## [13] "PIK3CD"    "PIK3R5"    "RUBCN"     "SLCO3A1"   "SUSD6"     "TRGV10"   
## [19] "XCL1"

## [1] 2
##  [1] "ISCA1"    "LCP2"     "NSUN6"    "NT5E"     "SLC27A5"  "TRAT1"   
##  [7] "AFF1"     "AFF4"     "COX19"    "ITK"      "LONP2"    "MYO9A"   
## [13] "PNPLA8"   "TBC1D14"  "TRAPPC11" "TRAPPC8"  "TRAPPC9"  "ZNF557"

## [1] 3
##  [1] "CAMK4"    "CCNB1IP1" "CMTM7"    "EPHX2"    "HDHD3"    "HIBADH"  
##  [7] "LBH"      "MZT2A"    "PCMTD2"   "PDE7A"    "SNHG15"   "TC2N"    
## [13] "THEM4"    "ZFAS1"    "CD46"     "FAM126B"  "KLF3"     "VPS13A"

## [1] 4
##  [1] "CREBL2"   "DYRK4"    "IGKV3-20" "MZF1"     "NBPF14"   "TBCCD1"  
##  [7] "TRAV13-1" "TRAV8-3"  "TRGV5"    "TSPYL4"   "CYTOR"    "IQCG"    
## [13] "PATL2"    "PYROXD1"  "SENP7"    "TRIM38"   "TSPAN32"  "ZNF683"

## [1] 5
##  [1] "CCR7"       "TRABD2A"    "AC116407.2" "DOCK10"     "HRH2"      
##  [6] "LINC02446"  "MX2"        "OAS2"       "OXNAD1"     "PCED1B"    
## [11] "S100A12"    "TRAV12-3"   "TRAV19"     "TRAV27"     "TRAV4"     
## [16] "TRAV9-2"    "TRBV11-2"   "TRBV4-2"

## [1] 6
##  [1] "BBS9"     "COG5"     "DTHD1"    "KLRF1"    "LRRC23"   "MAP3K2"  
##  [7] "MTRNR2L8" "NT5C3B"   "TOX"      "TRAV3"    "TRBV7-2"  "TRGV8"   
## [13] "ZNF862"   "COG7"     "MCTP2"    "MYBL1"    "TRGC2"

## [1] 7
##  [1] "CST3"    "APOL6"   "CD226"   "DIAPH2"  "DOCK11"  "EPSTI1"  "ERAP2"  
##  [8] "GNPTAB"  "GPR141"  "LILRB1"  "MIGA1"   "OGA"     "PARP15"  "PTPRJ"  
## [15] "ST6GAL1" "TTC17"   "UNC13D"  "ZNF493"

## [1] 8
##  [1] "ARMH1"     "BTG1"      "IL6R"      "SLC2A3"    "TMIGD2"    "ARHGAP45" 
##  [7] "DDIT4"     "HEXDC"     "IL6ST"     "ITM2A"     "OSM"       "PPP4R3B"  
## [13] "PSMA3-AS1" "STK17B"    "TARSL2"    "TENT5C"    "Z93930.2"  "ZC3H7B"

## [1] 9
##  [1] "ALOX5AP"  "FCRL3"    "GCSAM"    "GTF3A"    "ICAM3"    "MFNG"    
##  [7] "TRBC1"    "ADGRE5"   "ARHGAP30" "CCL4"     "CD38"     "CTSW"    
## [13] "FCRL6"    "FGFBP2"   "SLA2"     "TBC1D2B"  "TRBC2"    "ZAP70"   
## [19] "ZNF276"

## [1] 10
##  [1] "ANKRD49"  "ATAD2B"   "CCNH"     "CES1"     "CLUH"     "FAM13B"  
##  [7] "GPATCH2L" "INO80D"   "MICAL2"   "MX1"      "NAA25"    "NARFL"   
## [13] "NLRC5"    "PARP9"    "RHOH"     "STAT4"    "SYNRG"

## [1] 11
##  [1] "AC027644.3" "AC087623.3" "AC119396.1" "AC245297.3" "ARRDC2"    
##  [6] "IER5"       "KLF10"      "KMT2E-AS1"  "LINC00649"  "RAB33B"    
## [11] "SDR42E2"    "SESN2"      "TCP11L2"    "WARS2"      "MARF1"     
## [16] "RUFY2"      "SZT2"       "XIST"

## [1] 12
##  [1] "ASL"     "GALNT11" "NXT2"    "ASCL2"   "BTN3A1"  "FRYL"    "GNLY"   
##  [8] "GON4L"   "HSH2D"   "INPP4A"  "PHF14"   "POLH"    "RALGAPB" "SEMA4D" 
## [15] "TAOK1"   "TAOK3"   "USP16"   "ZEB2"

## [1] 13
##  [1] "AL118516.1"  "AL451085.1"  "AL627171.1"  "ATP2B1-AS1"  "HELQ"       
##  [6] "INPP4B"      "LINC02273"   "NUP58"       "PRR7"        "PTGER4"     
## [11] "RGS1"        "AC016831.7"  "CEMIP2"      "CRYBG1"      "GDPD5"      
## [16] "PRR5L"       "THUMPD3-AS1" "TMEM131L"

## [1] 14
##  [1] "RELT"      "B4GALT1"   "CAPNS1"    "CROCC"     "CST7"      "CX3CR1"   
##  [7] "DDHD1"     "IL2RG"     "IRAK4"     "LINC02384" "MBP"       "RNF213"   
## [13] "SAMD9L"    "SPN"       "SYNE1"     "UBE2H"     "UBR2"

## [1] 15
##  [1] "CYB5D2"  "INTS6"   "INTS8"   "PASK"    "PITPNC1" "TMEM134" "TRG-AS1"
##  [8] "CCDC88C" "EFHD2"   "GZMA"    "GZMH"    "NEAT1"   "PEX1"    "PEX26"  
## [15] "PRR14L"  "SSH1"    "STK10"

## [1] 16
##  [1] "AC004854.2" "AC083798.2" "AK5"        "CD40LG"     "CITED2"    
##  [6] "EI24"       "FAM213B"    "FXYD7"      "MYLIP"      "NR1D1"     
## [11] "PIK3IP1"    "PPP1R15B"   "RGL4"       "SLC38A2"    "TESPA1"    
## [16] "WDR86"      "WSB1"       "ARRDC3"     "ITPR2"

## [1] 17
##  [1] "ABCA5"      "AC020659.1" "ADHFE1"     "CCDC84"     "CLEC16A"   
##  [6] "CPPED1"     "DDX60L"     "GABPB2"     "GCN1"       "HECTD4"    
## [11] "ISG20"      "LAG3"       "ODF3B"      "PDZD4"      "TMEM127"   
## [16] "TRAC"       "VTI1A"      "ZCCHC2"

## [1] 18
##  [1] "AHCTF1"   "CAPN15"   "CCDC112"  "DOPEY2"   "H6PD"     "HLA-DPB1"
##  [7] "HLA-DQA1" "KLHDC4"   "N4BP1"    "PNPLA6"   "PREX1"    "PTGDS"   
## [13] "RREB1"    "SLC38A10" "TBC1D9B"  "ZNF292"   "ZNF318"

## [1] 19
##  [1] "C12orf45" "CLDND1"   "CXXC5"    "GSTM1"    "ITGAE"    "KCTD7"   
##  [7] "KLRK1"    "MSC"      "NCR1"     "PAPSS1"   "RHOC"     "SESN1"   
## [13] "ARAP2"    "GALNT3"   "HIVEP3"   "NLRC3"    "SLF2"     "ZDHHC5"

## [1] 20
##  [1] "HMBOX1"  "KIF9"    "ABCA7"   "ANKRD36" "AP3B1"   "AP3M2"   "ARHGEF9"
##  [8] "C2CD3"   "C5orf24" "GPHN"    "PIP4K2B" "RIPOR2"  "UVRAG"   "VPS18"  
## [15] "VPS39"   "WDR60"   "XPO6"    "ZBTB20"  "ZNF407"

## [1] 21
##  [1] "GADD45B"     "TRBV6-1"     "AKNA"        "DENND4B"     "FAM78A"     
##  [6] "GPR132"      "HPS4"        "MT2A"        "NBEAL2"      "NUTM2B-AS1" 
## [11] "PCNX1"       "PUM3"        "SEC14L1"     "SLC16A1-AS1" "TRANK1"     
## [16] "TRBV2"       "TRGV4"       "TTC38"       "ZNF83"

## [1] 22
##  [1] "AC044849.1" "APMAP"      "C10orf91"   "MATK"       "BICRAL"    
##  [6] "ERBIN"      "GZMB"       "ITGAL"      "KIAA2026"   "MYO1G"     
## [11] "NKG7"       "RAP1GAP2"   "RASGRP1"    "RNF125"     "RNF19A"    
## [16] "SPON2"      "SRGN"       "VPS13C"

## [1] 23
##  [1] "AMD1"     "CCNL1"    "GLTP"     "NT5DC1"   "RGCC"     "RSRP1"   
##  [7] "SERINC5"  "SNHG12"   "ANKRD36B" "ANKRD36C" "CHD9"     "EML4"    
## [13] "ENOSF1"   "NFATC3"   "PARP4"    "SLFN12L"  "TEP1"     "TTC14"   
## [19] "ZNF708"

## [1] 24
##  [1] "AC008555.5" "AL135791.1" "BTG2"       "CXorf40A"   "LIME1"     
##  [6] "LINC00402"  "MCUB"       "PDCD4-AS1"  "PGGHG"      "PRAG1"     
## [11] "TNFRSF25"   "TRAV14DV4"  "ZNF749"     "CARD16"     "GK5"       
## [16] "MIAT"       "PCSK7"      "SPATA13"

## [1] 25
##  [1] "CSRNP1"  "ID1"     "TXK"     "ZC3H12A" "ARID5B"  "CISH"    "EHBP1L1"
##  [8] "FGL2"    "GBP1"    "GBP5"    "KLF6"    "MKL1"    "NFKBIZ"  "PIM1"   
## [15] "PLAC8"   "RIC3"    "SETD5"

## [1] 26
##  [1] "ABR"      "ARAP1"    "C16orf72" "CHD6"     "CREBZF"   "DUS1L"   
##  [7] "FAM53B"   "HECA"     "HERC3"    "HERC6"    "INPP5D"   "LY6E"    
## [13] "PARP11"   "RAB27B"   "SYTL3"    "TTC16"    "XAF1"     "ZNF652"

## [1] 27
##  [1] "CD27"     "CD28"     "COA1"     "COQ8A"    "CRLF3"    "CYB561A3"
##  [7] "GZMK"     "IER3"     "KLRG1"    "LEPROTL1" "RCSD1"    "RTRAF"   
## [13] "SLC38A1"  "STK17A"   "TMEM107"  "TMEM42"   "ZFAND1"   "RFWD3"

## [1] 28
##  [1] "AIF1"      "ANXA2R"    "BEX4"      "CHRM3-AS2" "EPS8"      "RCAN3"    
##  [7] "SELL"      "TCEA3"     "CASP10"    "CHD1"      "CYTH1"     "MIDN"     
## [13] "MSI2"      "RNF157"    "SCRN3"     "SOS1"      "VCAN"

## [1] 29
##  [1] "AOAH"     "CARHSP1"  "EOMES"    "GIMAP1"   "GSTM4"    "ADGRG1"  
##  [7] "DGKD"     "IGKV3-15" "IRF9"     "ITGAM"    "KIR2DL3"  "MYO1F"   
## [13] "NCKAP1L"  "NECAP1"   "PRKCH"    "SLFN5"    "TUT7"     "YPEL1"

## [1] 30
##  [1] "AC007384.1" "AC015982.1" "CCL4L2"     "MZF1-AS1"   "RETREG1"   
##  [6] "ABCC10"     "AC092683.1" "AP005482.1" "BMT2"       "FAM133B"   
## [11] "GRK2"       "MINDY2"     "POLR2J3-1"  "SIDT1"      "THAP5"     
## [16] "TRDC"       "TRDV1"      "XCL2"       "ZNF808"

## [1] 31
##  [1] "AC004687.1" "AC007952.4" "AC025164.1" "AC025171.3" "AC087239.1"
##  [6] "AC245014.3" "AL121944.1" "AL139246.5" "CRTAM"      "ILF3-DT"   
## [11] "JAML"       "NR4A3"      "SNHG8"      "SNHG9"      "TRAV12-2"  
## [16] "TRBV3-1"    "TRBV6-2"    "TRBV7-9"

## [1] 32
##  [1] "ADCY7"    "BCL9L"    "CELF2"    "CHST12"   "ETNK1"    "GALNT2"  
##  [7] "KCNAB2"   "KIAA1109" "KIAA1551" "KLF2"     "MPPE1"    "PLEK"    
## [13] "PRDM2"    "PTPN7"    "SUSD1"    "UGGT1"    "WDTC1"    "ZBP1"

## [1] 33
##  [1] "CD84"        "CMC1"        "FAM118A"     "GLA"         "HIKESHI"    
##  [6] "HLA-DMB"     "KIR3DL2"     "KLRC3"       "LETMD1"      "MAPRE2"     
## [11] "SH2D1A"      "TRAV38-2DV8" "TRBV6-5"     "TRGV9"       "MCOLN2"     
## [16] "TMEM181"

## [1] 34
##  [1] "GATA3"    "GPR183"   "MAP3K8"   "NR4A2"    "SLC4A4"   "TIGIT"   
##  [7] "ARHGAP10" "CSNK1G2"  "FAM160B1" "GPRIN3"   "LPCAT1"   "LRRC8A"  
## [13] "PARP14"   "RAPGEF1"  "SLC20A1"  "TBX21"    "TGFBR3"   "ZFAND3"

## [1] 35
##  [1] "ALKBH7"  "ARL4A"   "ATP5F1A" "C6orf48" "EFCAB2"  "FAM173A" "FCMR"   
##  [8] "MPST"    "NOSIP"   "PTRHD1"  "RGS10"   "ELMO1"   "IQGAP2"  "LPIN1"  
## [15] "PDE4B"   "PDE4D"   "TOB1"    "TUT4"

## [1] 36
##  [1] "AC012645.3" "AC016405.3" "AC020911.2" "AC083880.1" "AC091271.1"
##  [6] "AC103591.3" "AF213884.3" "AL357060.1" "ARF4-AS1"   "C6orf99"   
## [11] "CSKMT"      "HIPK1-AS1"  "INTS6L"     "KCNQ1OT1"   "LINC01465" 
## [16] "NPIPB4"     "OSER1-DT"   "Z93241.1"

## [1] 37
##  [1] "BNIP3L"  "MXI1"    "NUAK2"   "PDE3B"   "GALNT10" "HIPK1"   "IFI44L" 
##  [8] "MBD5"    "NRDC"    "PPM1K"   "RASA3"   "RLF"     "SETX"    "TMEM8A" 
## [15] "TRBV7-6" "VPS13B"  "VPS13D"

## [1] 38
##  [1] "AL138963.3" "IGLV1-44"   "LRRN3"      "LST1"       "MATR3-1"   
##  [6] "NOCT"       "THAP9-AS1"  "TRAV1-2"    "TRAV21"     "TRAV5"     
## [11] "TRAV8-4"    "TRBV20-1"   "TRBV28"     "TRBV9"      "TRGV7"     
## [16] "VAMP7"      "IFI27"

## [1] 39
##  [1] "C12orf10"     "COQ7"         "CXorf57"      "EPB41L4A-AS1" "FAM102A"     
##  [6] "HIST1H3H"     "IFRD1"        "LDLRAP1"      "LTB"          "NELL2"       
## [11] "TCF7"         "TMEM204"      "ZNF10"        "ZNF575"       "MAPK8IP3"    
## [16] "USP34"        "ZBTB40"

## [1] 40
##  [1] "AL136454.1" "BX284668.6" "TRAV8-2"    "DDX3Y"      "EIF1AY"    
##  [6] "ETFDH"      "KDM5D"      "MAF"        "MTERF2"     "RPAP1"     
## [11] "RPS4Y1"     "SBNO2"      "TRAV17"     "TTTY15"     "UTY"       
## [16] "ZMIZ2"      "ZNF236"

for(n1 in names(goseq_res)){
  k = as.numeric(gsub("set_", "", n1))
  print(n1)
  print(gene_sets[[k]])
  print(goseq_res[[n1]])

}
## [1] "set_1"
##  [1] "ARHGEF3"   "ARL4C"     "BROX"      "COL6A2"    "COL6A3"    "DMTF1"    
##  [7] "ELMOD3"    "FAM169A"   "GPR174"    "IFITM2"    "LAIR2"     "LINC02256"
## [13] "PIK3CD"    "PIK3R5"    "RUBCN"     "SLCO3A1"   "SUSD6"     "TRGV10"   
## [19] "XCL1"     
## $reactome
##                                                         category
## 159                  collagen biosynthesis and modifying enzymes
## 160                                 collagen chain trimerization
## 590                                           ncam1 interactions
## 72  assembly of collagen fibrils and other multimeric structures
## 162                                           collagen formation
##     over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 159            9.873335e-05                1.0000000          2        2
## 160            9.873335e-05                1.0000000          2        2
## 590            2.945125e-04                0.9999992          2        3
## 72             2.946069e-04                0.9999992          2        3
## 162            2.946069e-04                0.9999992          2        3
##            FDR
## 159 0.05741344
## 160 0.05741344
## 590 0.06852556
## 72  0.06852556
## 162 0.06852556
## 
## [1] "set_2"
##  [1] "ISCA1"    "LCP2"     "NSUN6"    "NT5E"     "SLC27A5"  "TRAT1"   
##  [7] "AFF1"     "AFF4"     "COX19"    "ITK"      "LONP2"    "MYO9A"   
## [13] "PNPLA8"   "TBC1D14"  "TRAPPC11" "TRAPPC8"  "TRAPPC9"  "ZNF557"  
## $reactome
##                          category over_represented_pvalue
## 748 rab regulation of trafficking            4.358593e-05
##     under_represented_pvalue numDEInCat numInCat        FDR
## 748                0.9999989          4       15 0.05069043
## 
## $go_bp
##               category over_represented_pvalue under_represented_pvalue
## 4649   vesicle coating            5.212310e-06                1.0000000
## 4658 vesicle targeting            1.293776e-05                0.9999999
## 4661 vesicle tethering            1.293776e-05                0.9999999
##      numDEInCat numInCat        FDR
## 4649          3        4 0.02023034
## 4658          3        5 0.02023034
## 4661          3        5 0.02023034
## 
## [1] "set_8"
##  [1] "ARMH1"     "BTG1"      "IL6R"      "SLC2A3"    "TMIGD2"    "ARHGAP45" 
##  [7] "DDIT4"     "HEXDC"     "IL6ST"     "ITM2A"     "OSM"       "PPP4R3B"  
## [13] "PSMA3-AS1" "STK17B"    "TARSL2"    "TENT5C"    "Z93930.2"  "ZC3H7B"   
## $reactome
##                           category over_represented_pvalue
## 478 interleukin 6 family signaling            1.531729e-05
##     under_represented_pvalue numDEInCat numInCat        FDR
## 478                0.9999999          3        8 0.01781401
## 
## [1] "set_9"
##  [1] "ALOX5AP"  "FCRL3"    "GCSAM"    "GTF3A"    "ICAM3"    "MFNG"    
##  [7] "TRBC1"    "ADGRE5"   "ARHGAP30" "CCL4"     "CD38"     "CTSW"    
## [13] "FCRL6"    "FGFBP2"   "SLA2"     "TBC1D2B"  "TRBC2"    "ZAP70"   
## [19] "ZNF276"  
## $go_bp
##                                                                category
## 135                         antigen receptor mediated signaling pathway
## 1132 immune response regulating cell surface receptor signaling pathway
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 135             1.435375e-06                0.9999999          7       62
## 1132            1.060749e-05                0.9999994          7       83
##              FDR
## 135  0.006733346
## 1132 0.024879875
## 
## [1] "set_20"
##  [1] "HMBOX1"  "KIF9"    "ABCA7"   "ANKRD36" "AP3B1"   "AP3M2"   "ARHGEF9"
##  [8] "C2CD3"   "C5orf24" "GPHN"    "PIP4K2B" "RIPOR2"  "UVRAG"   "VPS18"  
## [15] "VPS39"   "WDR60"   "XPO6"    "ZBTB20"  "ZNF407" 
## $reactome
##                           category over_represented_pvalue
## 895 sars cov 2 modulates autophagy            1.133752e-06
##     under_represented_pvalue numDEInCat numInCat         FDR
## 895                        1          3        3 0.001318553
## 
## $go_bp
##                                     category over_represented_pvalue
## 4354                  snare complex assembly            1.330716e-06
## 4657                    vesicle organization            2.016482e-06
## 2256               organelle membrane fusion            8.249263e-06
## 2254                        organelle fusion            2.042630e-05
## 1423                         membrane fusion            2.072087e-05
## 4454 synaptic vesicle cytoskeletal transport            1.298258e-04
## 3894    regulation of snare complex assembly            1.337203e-04
##      under_represented_pvalue numDEInCat numInCat         FDR
## 4354                1.0000000          3        3 0.004729658
## 4657                0.9999999          6       38 0.004729658
## 2256                0.9999999          4       13 0.012899098
## 2254                0.9999996          4       16 0.019440325
## 1423                0.9999996          4       16 0.019440325
## 4454                1.0000000          2        2 0.089611692
## 3894                1.0000000          2        2 0.089611692
## 
## [1] "set_22"
##  [1] "AC044849.1" "APMAP"      "C10orf91"   "MATK"       "BICRAL"    
##  [6] "ERBIN"      "GZMB"       "ITGAL"      "KIAA2026"   "MYO1G"     
## [11] "NKG7"       "RAP1GAP2"   "RASGRP1"    "RNF125"     "RNF19A"    
## [16] "SPON2"      "SRGN"       "VPS13C"    
## $reactome
##            category over_represented_pvalue under_represented_pvalue numDEInCat
## 754 rap1 signalling            6.075026e-05                        1          2
##     numInCat        FDR
## 754        2 0.07065255
## 
## $immune
##                                   category over_represented_pvalue
## 2392 gse26495 naive vs pd1low cd8 tcell dn            8.615551e-06
##      under_represented_pvalue numDEInCat numInCat        FDR
## 2392                0.9999995          7       77 0.04392208
## 
## [1] "set_25"
##  [1] "CSRNP1"  "ID1"     "TXK"     "ZC3H12A" "ARID5B"  "CISH"    "EHBP1L1"
##  [8] "FGL2"    "GBP1"    "GBP5"    "KLF6"    "MKL1"    "NFKBIZ"  "PIM1"   
## [15] "PLAC8"   "RIC3"    "SETD5"  
## $immune
##                                                                         category
## 20            fletcher pbmc bcg 10w infant ppd stimulated vs unstimulated 10w up
## 3583                             gse3920 untreated vs ifna treated fibroblast up
## 3339                               gse36888 untreated vs il2 treated tcell 6h up
## 4891                        gse9988 anti trem1 vs anti trem1 and lps monocyte dn
## 4888                     gse9988 anti trem1 and lps vs ctrl treated monocytes up
## 2916                                       gse30971 wbp7 het vs ko macrophage dn
## 5090 van den biggelaar pbmc prevnar 9mo infant stimulated vs unstimulated 9mo up
## 2909                          gse30971 ctrl vs lps stim macrophage wbp7 ko 2h up
## 2912                           gse30971 wbp7 het vs ko macrophage 2h lps stim dn
## 2911                          gse30971 ctrl vs lps stim macrophage wbp7 ko 4h up
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 20              9.958736e-06                0.9999998          4       14
## 3583            2.558259e-05                0.9999990          5       36
## 3339            3.201994e-05                0.9999986          5       37
## 4891            4.379491e-05                0.9999980          5       36
## 4888            6.422389e-05                0.9999968          5       41
## 2916            7.157358e-05                0.9999978          4       21
## 5090            9.112306e-05                0.9999987          3        9
## 2909            1.055383e-04                0.9999963          4       23
## 2912            1.303632e-04                0.9999951          4       25
## 2911            1.346986e-04                0.9999949          4       24
##             FDR
## 20   0.05076964
## 3583 0.05441255
## 3339 0.05441255
## 4891 0.05581662
## 4888 0.06081368
## 2916 0.06081368
## 5090 0.06636363
## 2909 0.06725426
## 2912 0.06767447
## 2911 0.06767447
## 
## [1] "set_29"
##  [1] "AOAH"     "CARHSP1"  "EOMES"    "GIMAP1"   "GSTM4"    "ADGRG1"  
##  [7] "DGKD"     "IGKV3-15" "IRF9"     "ITGAM"    "KIR2DL3"  "MYO1F"   
## [13] "NCKAP1L"  "NECAP1"   "PRKCH"    "SLFN5"    "TUT7"     "YPEL1"   
## $go_bp
##                        category over_represented_pvalue
## 3048 protein kinase c signaling            1.463583e-05
##      under_represented_pvalue numDEInCat numInCat        FDR
## 3048                0.9999999          3        6 0.06865668
## 
## [1] "set_31"
##  [1] "AC004687.1" "AC007952.4" "AC025164.1" "AC025171.3" "AC087239.1"
##  [6] "AC245014.3" "AL121944.1" "AL139246.5" "CRTAM"      "ILF3-DT"   
## [11] "JAML"       "NR4A3"      "SNHG8"      "SNHG9"      "TRAV12-2"  
## [16] "TRBV3-1"    "TRBV6-2"    "TRBV7-9"   
## $go_bp
##                                                                         category
## 1083 heterophilic cell cell adhesion via plasma membrane cell adhesion molecules
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 1083            2.059281e-05                        1          2        4
##             FDR
## 1083 0.09660088
## 
## [1] "set_34"
##  [1] "GATA3"    "GPR183"   "MAP3K8"   "NR4A2"    "SLC4A4"   "TIGIT"   
##  [7] "ARHGAP10" "CSNK1G2"  "FAM160B1" "GPRIN3"   "LPCAT1"   "LRRC8A"  
## [13] "PARP14"   "RAPGEF1"  "SLC20A1"  "TBX21"    "TGFBR3"   "ZFAND3"  
## $immune
##                                                      category
## 4407 gse5542 untreated vs ifna treated epithelial cells 6h up
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 4407            1.614583e-05                0.9999991          6       51
##             FDR
## 4407 0.08231146
## 
## [1] "set_40"
##  [1] "AL136454.1" "BX284668.6" "TRAV8-2"    "DDX3Y"      "EIF1AY"    
##  [6] "ETFDH"      "KDM5D"      "MAF"        "MTERF2"     "RPAP1"     
## [11] "RPS4Y1"     "SBNO2"      "TRAV17"     "TTTY15"     "UTY"       
## [16] "ZMIZ2"      "ZNF236"    
## $immune
##                                                  category
## 4352 gse5099 classical m1 vs alternative m2 macrophage dn
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 4352            1.958376e-05                0.9999996          4       20
##             FDR
## 4352 0.09983799
saveRDS(goseq_res, sprintf("output/gene_set_enrichments_%s.RDS", 
                           file_tag))

Session information

gc()
##            used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells  8958343 478.5   16391124 875.4         NA 16391124 875.4
## Vcells 19140580 146.1   59907924 457.1      65536 77218972 589.2
sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.4.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] TxDb.Hsapiens.UCSC.hg38.knownGene_3.16.0
##  [2] GenomicFeatures_1.50.4                  
##  [3] GenomicRanges_1.50.2                    
##  [4] GenomeInfoDb_1.34.9                     
##  [5] org.Hs.eg.db_3.16.0                     
##  [6] AnnotationDbi_1.60.2                    
##  [7] IRanges_2.32.0                          
##  [8] S4Vectors_0.36.2                        
##  [9] Biobase_2.58.0                          
## [10] BiocGenerics_0.44.0                     
## [11] goseq_1.50.0                            
## [12] geneLenDataBase_1.34.0                  
## [13] BiasedUrn_2.0.10                        
## [14] fgsea_1.24.0                            
## [15] biomaRt_2.54.1                          
## [16] limma_3.54.2                            
## [17] tidyr_1.3.0                             
## [18] ggpubr_0.6.0                            
## [19] ggplot2_3.4.2                           
## [20] data.table_1.14.8                       
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-162                matrixStats_1.0.0          
##  [3] bitops_1.0-7                bit64_4.0.5                
##  [5] filelock_1.0.2              progress_1.2.2             
##  [7] httr_1.4.6                  tools_4.2.3                
##  [9] backports_1.4.1             bslib_0.4.2                
## [11] utf8_1.2.3                  R6_2.5.1                   
## [13] mgcv_1.8-42                 DBI_1.1.3                  
## [15] colorspace_2.1-0            withr_2.5.0                
## [17] tidyselect_1.2.0            prettyunits_1.1.1          
## [19] bit_4.0.5                   curl_5.0.1                 
## [21] compiler_4.2.3              cli_3.6.1                  
## [23] xml2_1.3.4                  DelayedArray_0.24.0        
## [25] rtracklayer_1.58.0          sass_0.4.5                 
## [27] scales_1.2.1                rappdirs_0.3.3             
## [29] Rsamtools_2.14.0            stringr_1.5.0              
## [31] digest_0.6.31               rmarkdown_2.21             
## [33] XVector_0.38.0              pkgconfig_2.0.3            
## [35] htmltools_0.5.5             MatrixGenerics_1.10.0      
## [37] dbplyr_2.3.2                fastmap_1.1.1              
## [39] rlang_1.1.0                 rstudioapi_0.14            
## [41] RSQLite_2.3.1               BiocIO_1.8.0               
## [43] jquerylib_0.1.4             generics_0.1.3             
## [45] jsonlite_1.8.4              BiocParallel_1.32.6        
## [47] dplyr_1.1.2                 car_3.1-2                  
## [49] RCurl_1.98-1.12             magrittr_2.0.3             
## [51] GO.db_3.16.0                GenomeInfoDbData_1.2.9     
## [53] Matrix_1.6-4                Rcpp_1.0.10                
## [55] munsell_0.5.0               fansi_1.0.4                
## [57] abind_1.4-5                 lifecycle_1.0.3            
## [59] stringi_1.7.12              yaml_2.3.7                 
## [61] carData_3.0-5               SummarizedExperiment_1.28.0
## [63] zlibbioc_1.44.0             BiocFileCache_2.6.1        
## [65] grid_4.2.3                  blob_1.2.4                 
## [67] parallel_4.2.3              crayon_1.5.2               
## [69] lattice_0.20-45             splines_4.2.3              
## [71] Biostrings_2.66.0           cowplot_1.1.1              
## [73] hms_1.1.3                   KEGGREST_1.38.0            
## [75] knitr_1.44                  pillar_1.9.0               
## [77] rjson_0.2.21                ggsignif_0.6.4             
## [79] codetools_0.2-19            fastmatch_1.1-3            
## [81] XML_3.99-0.14               glue_1.6.2                 
## [83] evaluate_0.20               png_0.1-8                  
## [85] vctrs_0.6.2                 gtable_0.3.3               
## [87] purrr_1.0.1                 cachem_1.0.7               
## [89] xfun_0.39                   broom_1.0.4                
## [91] restfulr_0.0.15             rstatix_0.7.2              
## [93] tibble_3.2.1                GenomicAlignments_1.34.1   
## [95] memoise_2.0.1